{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Datapoints"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from rdkit import Chem\n",
    "from chemprop.data.datapoints import LazyMoleculeDatapoint, MoleculeDatapoint, ReactionDatapoint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Molecule Datapoints"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`MoleculeDatapoint`s are made from target value(s) and either a `rdkit.Chem.Mol` object or a SMILES."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "mol = Chem.MolFromInchi(\"InChI=1S/C2H6/c1-2/h1-2H3\")\n",
    "smi = \"CC\"\n",
    "n_targets = 1\n",
    "y = np.random.rand(n_targets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d215b0>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None, V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint(mol, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d21770>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint.from_smi(smi, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hydrogens in the graph"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Explicit hydrogens in the graph created by `from_smi` can be controlled using `keep_h` and `add_h`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAPWUlEQVR4nO3dW0yT5x8H8LcIVU5ylCGKHGQcRVDAEzAmsoCTLUsWdrGsy5Il4C7WsQxWLly6C5dg2aHLki2wOFezK7bdFB1uBcEjKKAMx0EUEUEROcjGsVTa/8Xjv2NyKrxtn/fw/VxpAu030H7pe3h+j8RkMjEAALBaDrQDAADwG2oUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsIIaBQBgBTUKAMAKahQAgBXUKAAAK6hRAABWUKMAAKw40g4A8K+jR4/evXs3Li7u/fffp50FwFISDMoDLvjjjz/eeeed/v5+8t+AgIDy8vLk5GS6qQAsgYN6oKy3tzcxMTEzM7O/v18ikWzevJlhmAcPHqSkpKSlpQ0NDdEOCLAM1ChQ8+TJk7y8vODg4KamJoZhQkNDW1paent7GxsbQ0NDGYY5f/68v79/Xl6e0WikHRZgUahRoOOHH37w8vIqKyszGo1ubm7Hjx/v6uratm0bwzAJCQldXV0//viju7v77OxsWVnZ+vXrv//+e9qRARaGc6Ngb3fu3ElPT+/p6WEYZs2aNYcPH/7mm28kEsn8r5ydnc3Lyztx4gT5NPrmm29+8cUX/v7+9k4MsCTUKNjP5OSkSqVSqVRTU1MMw6Smpv76668bNmxY+rsePnz4+uuvNzQ0GAwGV1fXgoKCoqKidevW2SUygAVMALZnNBrLy8uDgoLIq+7AgQN1dXUreoTbt2/n5OSQbw8MDNRoNDaKCrBSqFGwucbGxpSUFNKAO3fuvHDhwqofqrq6evv27eSh9u/f/+eff1oxJ8DqoEbBhh48eJCbm7tmzRqGYTZu3FhaWvrkyROWj2kwGEpLS8mpAAcHB5lM9ujRI6ukBVgd1CjYxMzMjFqtXr9+PcMwTk5Ocrn877//tuLjj4yMKBQKqVTKMIyXl1dxcbFer7fi4wNYDjUK1qfVardu3UoOvbOzs2/fvm2jJ+ro6Hj55ZfJE0VERJw+fdpGTwSwBNQoWFN7e/vBgwdJr0VGRv722292eFKdThcVFUWeNCMjo7W11Q5PCmCGGgXrGBkZkcvljo6O5ChbrVYbDAa7Pfv8cwijo6N2e3YQOdQosDX3mo+jo2Nubi6taz6Dg4NyuZxc0fLx8VGr1eyvaAEsCzUKrFRXV8fGxpID6vT09JaWFtqJTE1NTampqeb7q86fP087EQgcahRW6datW+b74cPCwsrLy2kn+g+tVhscHGy+zNXd3U07EQgWahRWbHx8XKlUrl27lmEYV1dXpVI5PT1NO9QCJiYmiouL3dzcGIZxdnZWKBRjY2O0Q4EAoUZhBYxGo0ajIcNBJBKJTCbr7++nHWoZfX19MpmMjD7ZtGmTRqMxGo20Q4GgoEbBUleuXNmzZw85TN61a9fly5dpJ1qBZ8KvdEU/wBJQo7A8YXygm52d1Wg0zz33HI8+SgMvoEZhKZOTkwI7vciXE7vAI6hRWJSAL3Z3dnZy+TYD4BfUKCxg7q2XO3bsEOqtl1VVVWTbEjIClQs3vQIfoUbhP4aGhkS1EIgswfL19aW+BAv4CzUKT5Fl6R4eHiJclj48PGweCODt7W3ngQDAd6hRMJlMJp1OFx0dLfIhSe3t7VlZWebxVJWVlbQTAT+gRsUOIzufodVqQ0ND7TAsFQQDNSpeGCC/GL1eb9PR/SAwqFExIjei+/n5mbczGhgYoB2Kc8hGUg4ODuaNpGZnZ2mHAi5CjYrO2bNnsbmm5RobG5OTk8mPKyEhgc22piBUqFERuXfvnkwmw1bvK2U0GsvLy4OCgsgq0pycnLt379IOBRwiMZlMDAjdxMRESUnJsWPHpqenXV1dCwoKioqK1q1bRzsXn0xOTqpUKpVKNTU15eLiUlhYqFAonJ2daecCDqDd42Bb5JNUYGAg8/9PUj09PbRD8Vhvb695SsvmzZt5OqUFrAs1KmRXr17dt28f+XuZlJR06dIl2okEora2Nj4+nvxg09LSrl+/TjsR0IQaFab79++brzIHBATgKrPVzR27R+52ePjwIe1QQAdqVGjIaDt3d3eGYaRSqVwu/+eff2iHEqzHjx8rFAoyds/T07O4uBhj90QINSooWq02JCSEHGxmZ2ffuXOHdiJRuHnzZnZ2NvmxP//88xi7JzaoUYG4fv36Cy+8QN7J8fHxtbW1tBOJjk6ni4mJIb+CjIyMv/76i3YisBPUKO/NHW1HphMJe7Qdl5EpWZ6engzDODk55ebmDg4O0g4FNoca5bH5o+0eP35MOxQ8HbuHP2zigRrlKxxCclxbW1tmZib5BUVFRZ05c4Z2IrAV1Cj/zL2gER4eXlFRQTsRLOqZi35dXV20E4H1oUb5BLfX8BEZu4db0AQMNcoP80fb4WZvfsGCCAFDjfJATU1NXFyceelhc3Mz7USwSnOX5yYmJmJ5rjCgRjmNjLbDIAwhIcNitmzZgmExgoFBeRxFxrKR0XZkLBtG2wkJRhcKCu0eh2fN/7SCIcFChUHawoAa5ZaGhoa5W1ZcvHiRdiKwOWzrwneoUa7ABmpihk0GeQ01St/c7XzJfYXYzlec5m55Te4LxpbXvIAapUyr1YaGhpIDOqxyAZPJ1NHRcejQIfKSCA8PP3XqFO1EsAzUKDVtbW1ZWVnk3RIZGVlZWUk7EXCITqeLjo4mL4+MjIzW1lbaiWBRqFEK5k8AMhgMtEMB58yf4DU6Oko7FCwANWpXBoOhtLTU19eXYRhHR0fMo4RlzZ0n6+Pjg7F7HIQatZ+qqqpt27aRw7QDBw7cuHGDdiLgjWvXrqWmppIXz44dO86dO0c7EfwLNWoPnZ2dOTk55D2AvXpg1bRabXBwsPmCJPba4gjUqG2NjY0plUoy2s7NzU2pVGK0HbBBdn51c3NjGMbZ2VmhUGDsHnWoUVvBPuZgO319feaZNZs2bcLMGrpQozZRX1+/e/ducvC1a9euuro62olAgK5cubJ3717yMktKSrp8+TLtRCKFGrWy3t5ejLYDuzEajRqNxt/fnwyykclk/f39tEOJDmrUaiYmJswnrVxcXBQKxdjYGO1QIArj4+NKpZLM2XN1dVUqlVNTU7RDiQhq1Dq0Wm1QUJD5Emp3dzftRCA6t27dMt8QEhYWhhtC7AY1ylZTU1NKSgp57e7cufPChQu0E4GoVVdXx8bGkhdkenp6S0sL7UTChxpdPTLajiwv8fX1xfIS4AiyWG7Dhg3mxXKPHj2iHUrIUKOrQRY7k9F2ZLEzRtsB14yMjMjlckdHR4ZhvLy8MLrBdlCjK6bVardu3WoevdPW1kY7EcCi2tvbDx48SF6uERERp0+fpp1IgFCjK4BXJPAU/vbbFGrUIjg+Ar7DmSjbQY0uA2frQUhwXdQWUKNLeebeEWzZCMKAu/SsCzW6MNzJDIKHNSPWghp9FtbVgXhgBbNVoEb/hSkPIE6Yp8MSavSpq1evYuYYiNnc6Y67d+/GdEfLoUYxARfgKcwaXx1R1yj2YwCYDzvfrJR4axS7gwEsAfswWk6MNYq9agEshF3BLSGuGh0aGpLL5WQJh4+PD5ZwACyLLOTz9fU1L+QbHBykHYpbxFKjZEGxh4eHeUHx6Ogo7VAAvDE8PGz+COLt7Y2xEnOJokZ1Ol10dLR5vE1rayvtRAC81NbWlpmZSd5KkZGRlZWVtBNxgsBrtKOj49ChQ+S3Hh4efurUKdqJAHhPq9WGhoaaL892dXXRTkSZYGt0ZGREoVBIpVKGYTw9PYuLi/V6Pe1QAAKh1+vNY/ekUqnIx+4JsEbJLcR+fn7mW4gHBgZohwIQoPv37+fm5jo4ODAMs3HjxtLS0tnZWdqhKBBajZ49e3b79u3kcGP//v3Nzc20EwEIXENDQ3JyMnnTJSQkXLx4kXYiexNOjd67d08mk5HfZWBgoEajoZ0IQCyMRmN5efmWLVvIWJ+cnJyenh7aoexHYjKZGJ6bmJgoKSk5duzY9PS0q6trQUFBUVERmXQHAHYzOTmpUqnIO9HFxaWwsFAs70TaPc6KyP8GAnDQ3ONCkYzd43GNNjQ07Nu3j/y2EhMTL126RDsRADxVU1MTFxdH3p4vvviisK9S8LJG514fDAgIEO31QQAuE889Mzw7NzozM/Pdd9998sknY2NjUqn08OHDR48edXd3p50LABY2OjpaXFz81VdfzczMeHp6FhUV5efnkyl8C6qtrT1+/Dj591tvvWVeNLW08fHx9957j/w7IiLiyJEj7JOvAO0eXwGtVhsSEkJiY+0EAI/cvHkzOzubvHnDw8MrKioW+8rS0lJzO3355ZcWPv7g4KD5u1JSUqyU2lIO9qzsVWtvb8/Kynr11Ve7u7ujoqLOnDlTUVFhXo4GABxHqlOn08XExHR2dr7yyisvvfRSa2sr7VzWwfUaHRkZ+eCDD2JjY3///XcyV+bGjRsWfs4HAE7JyMi4du3a559/7uHhUVVVlZ6ertfraYeyAkfaARZlMBi+/fbbTz/9dHR01MnJ6d133/3ss8/I0EMA4CmpVPrRRx/JZLIjR47ExsYucZKURzhao1VVVfn5+eQzf0ZGhlqtjomJoR0KAKzDz8+vrKyMdgqr4dxB/dzzJmQHGHI+hXYuAICFcejTKLkxQq1W6/V6S26MAADgAk7UqNFo/Omnnz7++OOBgQFym25JSQnZLBsAgOPo1+i5c+fy8/Obm5sZhklLS1Or1fHx8bRDAQBYiua50b6+vrfffptMBSUjDGpqatChAMAvdD6NkoFaKpVqamqKDNRSKBTOzs5UwgAAsGHvGjWZTL/88kthYWFPTw8ZbVdSUhIUFGTnGADAfT///HNHR4clXzk9PW3rMEuwa40ODw9nZ2fX19czDJOUlPT111/v3bvXngEAgEfq6urq6upop1ieXc+Nent7Ozk5ka2v6uvr0aEAIAB2/TQqkUhOnjzp6+vr5uZmz+cFAD7as2dPbGysJV+p1+tPnjxp6zyLsfe50eDgYDs/IwDw1BtvvPHhhx9a8pVDQ0MUa5Rzi0EBAPgFNQoAwApqFACAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV+puIAACYhYSEvPbaa+TfYWFhFn7X2rVrzd8VHR1tk2SLk5hMJjs/JQCAkOCgHgCAFdQoAAArqFEAAFZQowAArKBGAQBYQY0CALCCGgUAYAU1CgDACmoUAIAV1CgAACuoUQAAVlCjAACsoEYBAFhBjQIAsPI/mfYDNf0DrLIAAABlelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAGYgZgLiBkZGBQ0gzcjIxpAAEmOC0IzM3AyMDIxMDCIg1eJ6ICG43oduy+yBWvchmWcPIoDi+2HiYgCzdgzCMqvRuwAAAKh6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJyNUEEKwzAMu+cV+kCD61BYjk1S1jKawJbtD7vv/8yhZE0PG7N9kIVkhBVKXcPl+cKnOCgF0I+x1uJhiEitKABuOi8RPo+uMj7dY77BgMUhfVSOOa2V6TGj12wtmRM60jzIYXFoog1UIcM3rB7oi86Irvvr4hTDIcoWzqUY9nCleY8gC0zrb9Vlr08QrN4jl0NZa+vfuwAAAEF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9oh1dlao0TDUM7K0NDDR0TXQMzLVsTbQMdADUrqowpo1APtNChCjpyj6AAAAAElFTkSuQmCC",
      "text/plain": [
       "<rdkit.Chem.rdchem.Mol at 0x7f64a9d21850>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint.from_smi(\"[H]CC\", y, keep_h=True).mol"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAGb0lEQVR4nO3cP0iV/x7A8a+npFq0SLRFHRQ1gkAHq6WlRce2iqLWIhCC1oTW/i0/bBUq22pMgtDTUjgkRKFJQihGSlG4WITn3EHu797LOZn66fcc9b5e4/N8h494ePscz0erisViAmCjcpUeAGBrk1GAEBkFCJFRgBAZBQiRUYCQnZUegK1ncXHx9u3bKaXLly/X1dWVHnj//v39+/dzudy1a9cyn66869evFwqFs2fPtra2lt79/PnzX3/9lVK6cuVKTU1N5tOxtVXZG2W9Zmdnm5qaUkoTExMdHR2lB4aHh3t7e3O53PLycubTlbdjx45CofDkyZOenp7Su5OTkwcPHkwpzczMNDY2Zj4dW5s39QAhMgoQIqMAITIKECKjACEWnti4R48eHThwoPT6mzdvsh9mLZ4+ffrx48fS658+fcp+GLYNC0+s298LT6vbhAtPvz1m4YkN8DTKxl28eHH//v2l16enpx8+fJj9PL91+vTplpaW0utfvny5e/du9vOwPXgaZd2s38N/8xETQIiMAoTIKECIjAKEyChAiIwChNgbZd3q6+tHR0dTSs3NzWUPHDlyZHR0tKqqKtOxVjUyMlIsFg8fPlz2bnNz88pXVF9fn+lYbAv2RgFCvKnnD7h161ZXV9fQ0FClB1mroaGhrq6uW7duVXoQtgMZ5Q+YnZ0dHx9fWFio9CBrtbCwMD4+Pjs7W+lB2A5kFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCZBQgREYBQmQUIERGAUJkFCBERgFCdlZ6ANL8/Pzw8HBK6dy5c7lcmR9sU1NTL168qKmpOXnyZObTURmPHz9eXFw8duxYW1tb6d1CoXDv3r2UUk9PT0NDQ+bT8T9ktPLevXt34cKFlNKpU6d27dpVeuDZs2eXLl1qaWmR0f8fV69enZ6eHhgYKJvRnz9/rrxm8vm8jFacN/UAITIKECKjACEyChAiowAhPqnfRPL5fHV1den1qamp7IdhM5iamhoZGSm9/vPnz+yH4ZeKVFo+n1/Ld6qlpaVYLI6Pj//TLwkqa3x8vFgstrS0rOVwPp+v9OuXoqfRTeTMmTO/Wr8fGxvLfh4qrru7+1fr90NDQ9nPQ3mV7jj/eRr9/v172QMDAwPp30+jm1NfX19K6c6dO5UeZK3u3LmTUurr66v0IL+08jQ6MDBQ9u73799XXjOeRjcDHzEBhMgoQIiMAoTIKECIjAKEyChAiL3Rymtvbx8cHEwplf0TppTSiRMnBgcHa2pqMh2Lirpx48bKv20ue7e6unrlNdPe3p7pWJQjo5XX0NBw/vz5VQ60tbWV3cFmG1v9X3TncrnVXzNkyZt6gBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUYAQGQUIkVGAEBkFCJFRgBAZBQiRUf6AxsbGzs7O+vr6Sg+yVvX19Z2dnY2NjZUehO2gqlgsVnoGgC1sZ6UHYOv58ePHy5cvU0rd3d179uwpPfD169fXr19XVVUdP3488+nKe/78ebFYPHz48L59+0rvLi0tjY2NpZSOHj26a9euzKdja/M0yrrNzs42NTWllCYmJjo6OkoPDA8P9/b25nK55eXlzKcrb8eOHYVC4cmTJz09PaV3JycnDx48mFKamZnxTp/18rtRgBAZBQiRUYAQGQUIkVGAEAtPbNzc3Nzu3btLr8/Pz2c/zFrMz89/+PCh9Prc3Fzms7B9WHhi3f5eeFrdJlx4+u0xC09sgKdRNq6rq6vs+v23b9/evn2b/Ty/dejQob1795ZeX1paevXqVfbzsD3IKBv34MGDVdbvs5/nt27evLn6+j1sgI+YAEJkFCBERgFCZBQgREYBQmQUIMTCE+tWW1vb39+fUqqrqyt7oLW1tb+/P5fbRD+k+/v7C4VCa2tr2bt1dXUrX1FtbW22c7Ed+CsmgJBN9LwAsBXJKECIjAKEyChAiIwChMgoQMi/AJsvobhUBxZMAAAAhnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAAOIGYH4gZGNgUFIM0CpRgZNEDSjMTS3AyMDAxMDAzMQL0MjKwMjGwMjOwMIiA58SyQAiQLHfYD6SUQroM9gi1w4NRJY1WoOFDNAXsk9n6oGgdUvTBxMBusXgwAriwUsztESVUAAADZelRYdE1PTCByZGtpdCAyMDIzLjA5LjUAAHicjZJBDoMgEEX3nOJfQIMooksV0zaNmLS2d+i+909n2uBomxoHFvPhzQA/KHBc/PnxxBzGKwXojVnXNe651loN4ARtfzgFdFPTxpVuvIXpigqOKmisyWYah7iSoUOiU2epn+ZMvwOSRNIQuQvMcURiUmO/9n/AgsG5ZZbaf6BdgckGWRI5H711SUfgro7VEtzg+uBXxn6sbsfgxWoeRvxkmYtrLAvxhqUVB6gSpTyTpZPHsKyWV1kezDr+DsrVC3NxdbukCdmpAAAAeHpUWHRTTUlMRVMgcmRraXQgMjAyMy4wOS41AAB4nIv2iHXWiPaI1QQTSEwgVqjR0DXSMzLVMdCx1jXQM0diGOqZwpi6YDZMGqYeXQrE0tRJLMnPDSjKL7Ay0Mss9swtyMlMzizRM7QyQuUao3JNUblmqFxzFG4NAG7AOeL/jG0zAAAAAElFTkSuQmCC",
      "text/plain": [
       "<rdkit.Chem.rdchem.Mol at 0x7f64a9d21d20>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint.from_smi(smi, y, add_h=True).mol"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Other datapoint properties"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Datapoints can be individually weighted in the loss function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d21bd0>, y=array([0.30484272]), weight=0.5, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint.from_smi(smi, y, weight=0.5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A string identifier (e.g. a name) can be assigned to a datapoint. If a SMILES is used to make the datapoint, the name defaults to the SMILES, but this can be overwritten."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d215b0>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint(mol, y, name=\"Ethane\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d22180>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='Ethane', V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MoleculeDatapoint.from_smi(smi, y, name=\"Ethane\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extra features and descriptors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extra datapoint descriptors (like [molecule features](../featurizers/molecule_featurizers.ipynb)) will be concatenated to the learned descriptors from message passing and used in the FFN. They are called `x_d`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d223b0>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=array([0.79952846, 0.57058144, 0.61951421]), x_phase=None, name='CC', V_f=None, E_f=None, V_d=None)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_extra_descriptors = 3\n",
    "MoleculeDatapoint.from_smi(smi, y, x_d=np.random.rand(n_extra_descriptors))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extra atom features, bond features, and atom descriptors are called `V_f`, `E_f`, `V_d`. In this context, features are used before the message passing operations, while descriptors are used after. Extra bond descriptors aren't currently supported as aggregation ignores the final bond (edge) representations. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MoleculeDatapoint(mol=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d22420>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name='CC', V_f=array([[0.3860953 , 0.64302719, 0.05571153],\n",
       "       [0.06926393, 0.90740897, 0.95685501]]), E_f=array([[0.55393371, 0.29979474, 0.07807503, 0.73485953]]), V_d=array([[0.10712249, 0.33913704, 0.37935725, 0.74724361, 0.49632224],\n",
       "       [0.8496356 , 0.31315312, 0.14000781, 0.58916825, 0.16698837]]))"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_atoms = 2\n",
    "n_bonds = 1\n",
    "n_extra_atom_features = 3\n",
    "n_extra_bond_features = 4\n",
    "n_extra_atom_descriptors = 5\n",
    "extra_atom_features = np.random.rand(n_atoms, n_extra_atom_features)\n",
    "extra_bond_features = np.random.rand(n_bonds, n_extra_bond_features)\n",
    "extra_atom_descriptors = np.random.rand(n_atoms, n_extra_atom_descriptors)\n",
    "MoleculeDatapoint.from_smi(\n",
    "    smi, y, V_f=extra_atom_features, E_f=extra_bond_features, V_d=extra_atom_descriptors\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reaction Datapoints"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`ReactionDatapoint`s are the same as for molecules expect for:\n",
    "1. extra atom features, bond features, and atom descriptors are not supported\n",
    "2. both reactant and product `rdkit.Chem.Mol` objects or SMILES are required"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ReactionDatapoint(rct=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d22570>, pdt=<rdkit.Chem.rdchem.Mol object at 0x7f64a9d22490>, y=array([0.30484272]), weight=1.0, gt_mask=None, lt_mask=None, x_d=None, x_phase=None, name=None)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Keep the atom mapping for hydrogens\n",
    "rct = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3])[F:5]\", sanitize=False)\n",
    "pdt = Chem.MolFromSmiles(\"[H:1][C:4]([H:2])([H:3]).[F:5]\", sanitize=False)\n",
    "Chem.SanitizeMol(\n",
    "    rct, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n",
    ")\n",
    "Chem.SanitizeMol(\n",
    "    pdt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_ADJUSTHS\n",
    ")\n",
    "ReactionDatapoint(rct, pdt, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The SMILES can either be a single reaction SMILES 'Reactant>Agent>Product', or a tuple of reactant and product SMILES. Note that if an Agent is provided, its graph is concatenated to the reactant graph with no edges connecting them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAbvElEQVR4nO3daUAUV7oG4LebpgEB2SEBATUMyhoX3GIQFRMVN1wgk2hcogYSrqJoEAlqDDruDksSjNuoKI6oMUa8LjioMSIiQb24sgVZBBQaRGiQbrruj4oMYdeiabv5nl/WqdPF15i8nqo6dYrHMAwIIYS8Lr6iCyCEEOVGMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnAgUXQDh5Ny5c2KxePDgwRYWFk33FhcXJyYmCoXCCRMmtHIQiUSSkJCQmZlZWVlpZmY2atQoa2truZVMiKrh0bxRpWZpaZmfn3/kyBFvb++me8+cOePh4aGnp1deXt7SEfbv379y5crCwsL6FjU1tc8///yf//ynhoaGXIomRLXQaLRL2759+7Jly3g8nqen56hRo4RC4cWLF2NjY6OiohiGiYqKUnSBhCgBitGuq6ysbM2aNXw+//Dhw/WDWV9fXzs7u7Vr1+7atSsoKIjO7glpE91i6roMDAxu3ry5b9++RhcEAgMD1dTU6urqLl++rKjaCFEiNBrt0mxsbGxsbBo1duvWzdzcPC8vr6SkRCFVEaJcaDTahfzxxx8+Pj4+Pj4VFRWtdGMYprS0FICRkVFnlUaIEqPRqCq4evVqXV1d0/bbt2833CwuLt65cyeAtWvXdu/evaWjJScni8ViAO+9915HV0qICqIYVQURERERERFtdtPX1x87diyA1mcybd68GYCbm9vf/va3jqqQEBVG80aVGztvdPr06fb29k33ZmVlxcTEtD5vtJHo6OjZs2cLBIJr1665uLh0aLGEqCYajaoCb2/vlqbfx8TEtP84ly9f9vX1BbB161bKUELaiW4xkT9dvHhxwoQJYrE4NDTU399f0eUQojQoRgkA7N27d9y4cVVVVatWrQoJCVF0OYQoEzqp7+pqa2sDAwPDw8O1tLSio6NnzZql6IoIUTIUo11IVlbW2rVrAURGRurp6QHIzc39+OOPExMTe/To8dNPPw0aNEjRNRKifChGu5CnT59GR0cD2Lx5s56e3p07d1xdXdmb+EKh8IsvvmjU38XFZceOHQoolBClQjHahRgaGnp6egLQ1NQEkJOTUz8RKjs7u2n/VqboE0Lq0bxR5VZWVlZXV9e9e3ehUNh0r0QiefbsGZ/PNzQ0bHZvZWVlKwcXCAS6urodVishKopiVKUwDCOTyfh8Po/HU3QthHQVNOFJpWzcuFEgEAQHByu6EEK6EIpRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTgaILIKQNNTU1z58/V1dX19fXb7ZDWVmZVCrV1dXV1NRs/VCPHj3Ky8sDYG1tbWlp2fG1KrO6ujqRSATA2NiYx+M17VBVVSUWizU1NXV1dds8WnV1dVZWlkgkMjMzs7a2bvOvRqnRaJS86Xbv3m1qauru7t5SBzc3N1NT0/3797dykGPHjtnZ2fXs2dPV1dXV1dXKysrBweHUqVNyqFdZPXjwwNTU1NTU9Pnz5812CA4ONjU1XbhwYevHSU9P9/b2NjY2dnJycnNz69u3r4mJia+v74sXL+RQ9RuBRqNE9e3du3f+/PkAhg4d6ubmxjBMQkJCSkrKlClTDh48+Mknnyi6QNVx/vx5T0/P6upqHR0dDw8PCwuLJ0+eJCYmFhUVaWhoKLo6eaEYJSpOKpWuWLECgI+Pz44dO9hGhmHmzZu3f//+5cuXf/TRR2pqagqtUUXk5eXNmDGjurp66tSpu3fvNjQ0ZNtra2vFYrFia5MrOqknKi4rK6ukpATA559/Xt/I4/F8fX0BFBYW5uTkKKg0VbNq1arnz587OTkdOXKkPkMBCIXClq5rqwaKUaLi6s8lq6urG7bX30XR1tbu7JpUUVVV1bFjxwCsWLFCXV1d0eV0KopRomouXLgQHR1948YNdrNHjx7m5uYAVqxYUV5eXt/tyJEjAIYNG/bWW28ppE5ll5aWFh0dHRcXx26mpKRUVVXxeDwPDw/FFtb56NooUQ4SiYSdq9TsroabGzZsSEhI8PPzGzRoEACBQLBp06bZs2dfvXq1T58+CxcunDNnTlJSUnh4uLm5+b59+zqheOVSUFDw7Nmzpu2VlZUNN0+cOLFmzRpHR8eJEycCSEtLA2BhYaGpqfn9998fO3YsIyNDR0enf//+ixcvHjZsWOcUrxAUo0Q5pKWlWVlZtaenlpaWtrZ2w/vCs2bNqq2tnT9//pMnT9avX/+Pf/wDgKWlZVJS0ttvvy2vipWWvb19e7oJhUJtbe1u3bqxm+ycU4Zh+vfv//DhQwDdunUrKCh4+PDh0aNHw8PD/fz85FezYlGMEuWgq6vLji6bSk5ObjhQqj/NrJeYmLh69Wo+n//FF1+UlJT88ssv1dXVubm5o0ePjo2NdXJykmPdSsjNza3ZqQsZGRkNTwiCgoKCgoLqN9m/goKCAjs7u0OHDk2ePFlHRyc3N3f58uVHjx5dsmSJq6urs7NzJ9Tf+VQ2Rvfs2VNUVDR+/PgBAwY03VtYWLh3714AwcHBzT6wUU8kErHnfdOnT7e2tpZPse1VXV198eLF69evl5SUCAQCS0tLDw+Pdo4dWBKJJCIiQiqVAujXr9/YsWPlVmzzUp8/T6yoaGnvZGNjqyazCxkAgH7Pnp4HDjTa5aav76yt7ezszJ5RNuvRo0cTJkwoLy/fu3fvvHnzAFRUVMTGxq5bt+7Bgweurq537tzp0aPH634hFfTLL7907969abu/v39ERERLn2KH/z179kxJSakfolpZWR08ePDWrVsZGRm7d+9u5eNKTWVjNDIy8vbt24aGhs3GaH5+fkhICIAVK1YIBM3/ErKzs8PDw/fs2VNVVQXAyMhozpw5cq25dbt27QoJCXny5EnDxq+++mrGjBlRUVHGxsYApk+f3rdv3z59+rR0kM2bN7NfHICvr2/nx+j/VVXtKypqae8AXd2mMVojkwEolUj2N/mgQ7duaOs+e1hYWHl5+bBhw9gMBdC9e/cFCxZ4enr269evoKBg+/bt27dvf+VvQv7KwMAAQHV1dX2GsoRCobu7e0ZGRiv/1Ck7lY1RLgoLC/38/E6ePCmTyTQ0NDQ1NWtqahRb0sqVKzdu3AjAwcHh008/7du3r0QiSUlJ2bVr17Fjx/7444+kpCSBQGBra2tra9vSQR4+fLhu3TqhUPj+++8nJCR0YvmN2WhpzWvu/ritllbTxqq6OgBqPN7G3r0b7erfjoe7b926BWDIkCGN2o2NjSdPnhwVFXXnzh225fbt2++++247yifNsLOzA/DkyROxWNwoSdlNmUymmMrkj2K0Gfr6+jdu3PDw8PDy8poyZcro0aNTU1MVWM/Zs2c3bdoE4MsvvwwPD68fPs+YMWPp0qVeXl5BQUEtjanryWSyhQsX1tTUrFq1SiQSKTZGDdXVxzaYnt2653V1ANR5vDEGBq/xs9jfTFFzQ+CnT5/i5bzRo0ePent7T5w4MTIysmfPnq/xg5SRWCyOjIxMTEw8efIkx0MNHTpUKBTW1tZeuHBh8uTJDXdduXIFQCsnScqO5o02Q0tLKy8v79SpU7Nnz9bT01N0OVi3bh3DMIMHD46MjGwUl2ZmZr/++mt7Zur9+OOPV65csbGxCQ4OllulcsGORgWtXsJu6PTp0zt37kxMTGQ3R4wYASAuLu7+/fsNu6Wnp585cwYAu+hJUVGRlpZWXFycg4NDaGhoo7n6qodhmJiYmD59+gQFBZ06dap+mm37paam7ty5k51yD8DAwGDKlCkAQkJCGi5usmPHDvbgs2bN6qDa3zgUo2+6vLy8q1evAli2bBmf/5p/X48fP2bTMyoqSumWLKt8xRjdvn27j49PTEwMu7lo0SJra+vKysrhw4d/++23586dS0hI2LRp0/Dhw6uqqpydnT/77DO2W3p6+qefflpdXb169WpbW9sDBw4wDCOnL6VYqampI0aMmDlzZn5+/oABAy5fvtzSLIhWxMXF+fj4rF27tr5lw4YNBgYGaWlpzs7OISEhERER06ZN+/LLLwHMmzeP/fdMJVGMAkBWVha7flphYaGia2ksOTmZ/cOYMWNa7ymRSMaPHz9+/PiUlJRGu/z8/MrLy2fNmtXmQRTujEj0yf37/pmZ9S31o9GCFy9+LS//39LS358/l7YccPr6+sbGxvVrYurr61++fNnd3b2srGzNmjXjxo1zd3cPCgoqKSmZOnXq+fPn6y/k9ejR48CBA9euXRsyZEh+fv6cOXOGDh2alJQkz6/b2QoLC318fAYPHvzbb78ZGxuHhYUlJye7urq+xqG0tbWNjY0bPjv/zjvvxMfH29nZ5eTkrF+/3t/f/8SJE0KhMDAw8Mcff+y4L/HGUfFro6dOnXr8+HHT9kZxWVlZ+dtvv6HJY9etuHfv3t27d7lX2AobG5v+/fvn5+cDMDExMWzrYqJMJjt79iyARYsWNWyPjY39+eefDQ0Nt23b1tJn79y50+ict02G7u5Mu0eIAMw1NOxfBlamWByYldWow0Rj4xF6emUSSbpY/FworG83GzHinW3bdPT0pry8FwTAUCD4nx49JhsZAYiIiHj27Fm/fv3YXcePH290ZGtr6wsXLty/f//KlSsFBQU8Hs/c3HzMmDG9m9yzAjBkyJDExMSDBw8GBgYmJycPHz585syZW7ZsMTMza/+XfQNJJJIffvhh9erVFRUV6urqfn5+oaGhDSc2WVlZ/fzzz3h5R6iphQsXjh49un5y2LJly5YtW9aoz8CBA9PS0hITE2/duvXixQtzc/MPP/yQnUaiwlQ8Rs+cOcNe/2qdiYnJ0qVLAbR/HZrY2NiGpzPy4Ofn991337GP5bVnvXE1NTV2EaOGT/uIRKLFixcD2LRpk6mpaUufPXToEDsToP2G//57zauc804yMlrz8taNSCpNaPB4O+tdHR3o6Tnq6Cx4++3uDaZ/f2Bv/9jIyK5bNydtbX2B4IlEEi8SZVRXf5uT80Im8zIxGTlyZHsKsLOzY+8mt4nP58+ePXvatGlbt27duHFjdHT0Tz/9tHz58pUrVyrpopmnTp1aunRpVlYWgDFjxkRERDT9Vejq6rIXN1vi6Ojo6OjY5s9SU1Njz+24FKxcVDxG58+f/8EHHzRtz87Obninxdzc/FVnDjo4OMyYMYNrfa1iZ7yy/9/W1ta22V8gEDQ9dVq2bFlxcfH777/PrlvcEicnp1f9OrZ6etJXGY3aN5jgaa+tvbzJOzzMhUIAztrazn+dCjrdxGS6iUnDlrlvvbU1Nzf26dPI/HwPQ0Nt+awWqqOj880333zyyScBAQGnT59eu3bt4cOH94WHDxs3Th4/Tk4ePHgQEBDADib69Omzffv2Lrh0iNwxKoqdAPjDDz80u7f+gqNEImnzUGyc7du3r6NrbJedO3cC0NTUbE+pjSQkJPB4PIFAcOvWrYbt7NPNvr6+HVdme/2rsHBgSsoX6ekcj1MhlbqkpAxMSfmPSNQhhbXuwoULjo6OAoHgjo0N4+7OpKV1wg/lSiRiFi8+PWoUAAMDg7CwsNf4T4i0h4qPRlUA+xhyTU3N3bt3X2lyuFQqXbBgAcMwRkZG69evb7jr5s2bAOLj4729vT08PObOnduhJXcGXTU1E6HwSW1t0V+Xd5ITd3f31NTUxP37Hb76CpmZGDAAX36JNWvwWlNZ5U4qRVQUvvkGItF4ff1VAQFLvv66zWvr5LVRjL7p+vXrZ2BgUFZWdvjw4VeKUbFYnJ2dDaC4uPjo0aNNO2RlZWVlZVlYWHRYrZ1IxjDPpFIA+m09d9BR1NXV3RYswLRpWLsW33+P8HBER2P1avj5obNqaJeEBCxZAvbJy9GjeWFh39LaK3JGE57edBoaGuzD4BEREffu3Wv/B7W1tVNa4OXlBWD69OkpKSkBAQHyKr2D3Hj+nE3MhpIqKl7IZHzAsZPXrjc0RHg40tIwdixEIixZAicnnD3bqTW0JDMT3t5wd0daGmxsEBuL//wHlKHyRzEKABkZGS4uLi4uLgUFBYqupRmrVq2ytLSsrq4ePXr0iRMnGj6bnJWVtXLlSvYkXSKRjBw5cuTIkdevXwegpqY2sAXsLXsTE5OBAwe+aa9rjystnX737hfp6exmckWFf0bG7AcPLpSV1TIMABlwvqxsdU4OgA8NDZsuZdIZ7Oxw9ix++QW9e+PBA4wfj0mTkJ2tgEpYVVX45hs4OuLoUWhrY80apKXBy0th9XQxb9LJiOKIxeLff/8dQP2rtA8cOLBkyRL2zxUVFQD8/PzYSVGfffbZ1q1bO7M8fX39c+fOjR8//tGjR9OmTTMxMbG3txcIBJmZmbm5uQzDVFRUfP/99zKZ7PLlywBKS0s7s7yOVSGVPqqpqX35T0VvLS0bLa17YnFQdraQzzdTVy+RSKplMgADdXW/VuzShZMmYexYREVh1SrExeH8efj6IjQUza0yJy8Mg+horFiBoiLwePj0U2zeDHotSudS2Rg1MjIyNTVtaSKxuro6OyJjFxs1MzP7+uuv8XKxr0aaXXuxk9nZ2aWlpYWFhf373/++d+8eG5d8Pt/W1nbSpEnsfHuBQODv7w+gV69erR9NS0vLwMCgpV+OXHVXU7PQ0DBp+ZVn/XR0/CwsdF/OYTJWV9/Xt+8ZkeiMSJRWVZX34oUmnz9AV3eioeFEY2PFn0wJhfD3x4wZCArCoUOIiMDx49i0CTNndsZPT07GkiW4dg0ABg1CeDhU+l0dbyweo6JPDauwysrKkpIShmHeeustreZWllNhEoZRf5XJqp0qJQX+/khMxOefo3OefRwyBMnJsLTEpk34+9/xxv5mVB3FKCEdhz3FHjcOLT8w1pGuXcPZs1ixAoo4sSD1KEYJkb+6Oty9i8ePIZXC2BjOzm0HX3U1Wl8oR1cXf324iyiKyl4bJeSNUFqK9euxfz9Eov82amjAwwOhoXBwaPGD169j1KjWjjxnDujt0G8GilGVkpSUdOnSpWHDhrm5uSm6FgI8fIhx45CTAzU1jBgBZ2cIBPjjD8TH48QJnDmDQ4cwbVrznzUwQEurGqamQiTq1PkApHUKfBCVdDj2DexBQUGKLoQwTGUlY2vLAIydXeNn8IuLmUmTGIDR0GBu3361w4pEjI4OAzA3b3ZgsYQLxc8YIUQ1hYUhPR36+oiPR6P15UxNcfw4Bg3Cixd41afIduxAZSU++AAv11clCkcxSoh87NwJAIsXo9lVC9TVwa4Xk5CAjIz2HlMiwQ8/AECT9ZKJAlGMEiIH2dnIzQUAT88W+7i7Q18fDINffwWAx4/x3Xf47ju8fJSuGYcOIT8fjo748MOOrpi8PopRQuSAXUSGx2vtXjyf/+fJPts5IwOLFmHRIlRVtfiRsDAACAigmfZvFLpTT4gcsNObdHTQ4KVSzTAy+m9nDQ2YmwNAS+9/PX8et2/D1BQff9yRpRLOKEYJkQN2tNjOMSPbbehQtL7AGPtGwsWLoWyvyFZ5dFJPiByw70asrETri/Oz49D2vEjxzh3Ex6NbN/j4dER9pCNRjBIiB337AoBMhlZeW80wYN/R3Z73lW7dCobB3LlQ9ZcVKyOKUULkwMbmz0U/T59usU9i4p+j0TbfRfz4MQ4fBo+HRYs6rkTSYShGCZEDHg+ffQYAkZF49qz5PqGhAPD++38OXVsRGYnaWkye3HZPoggUo4TIR0AAzM1RWAhPTzR6H4FEgiVLcO4c1NSwefOfjfn52LIFW7agpuYvncVi7NoF0JT7NxfdqSdEPoyMEBuLiRNx6RJsbfHRR3B2hqYm0tNx7BgyMsDnIzLyv+vVZ2UhMBAA5s//y7343btRWgoXl7bP/YmCUIwSIjfDhyMpCcuW4fRpREX9ZZeTE7Ztwwcf/LelWzfY2ADAyxeoAEBdHSIiAGD58k6ol7weilFC5KlPH8TFoaAAly6huBi1tTA1xeDBjRcrATBoUDMP19fUYMcOABg5shOKJa+HYpQQ+bOweM2X3Glrt7jqKHlj0C0mQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGCWEEE4oRgkhhBOKUUII4YRilBBCOKEYJYQQTihGCSGEE4pRQgjhhGKUEEI4oRglhBBOKEYJIYQTilFCCOGEYpQQQjihGFUpw4YNCwwMdHNzU3QhhHQhPIZhFF0DIYQoMXqlnXILDQ0tKyubM2fOu+++23TvgwcPdu7cqaWltX79+lYOkpmZuW/fvszMzGfPnpmbm7u5uXl5eWlpacmtakJUCo1GlZulpWV+fv6RI0e8vb2b7j1z5oyHh4eenl55eXmzH5fJZEFBQdu2bZPJZA3be/fuHRcXZ2dnJ5eiCVEtdG20S/vqq6+2bNliZWW1d+/etLS0u3fv/utf/7KwsMjOzvb09JRIJIoukBAlQCf1XdeLFy+uXLny9ttvJyUlmZmZsY329vb29vZDhgxJT09PTEyku1WEtIlitOvS0NC4fv16Xl5efYayXFxcBAKBVCotKipSVG2EKBE6qe/SeDyelZVVo8b09HSpVArAxsZGEUURomQoRruQ27dvs+fsJSUlLfUpLy/38/MDMGrUqIEDB3ZidYQoKzqpVwV79uy5dOlS0/bc3NyGm2Kx+P79+wDYwWZDMTExqampubm58fHx5eXlkyZNOnDggNzqJUSlUIyqgvPnz7enm5WV1bp16wDo6uo22nXy5MnY2Fj2z6amppMnT27ahxDSLJo3qtzYeaMhISHN3lK/ceNGcHBwK/NG6xUXF5eVlRUWFl6/fj0sLKy4uHjq1KnHjh3j8+myDyFtoNGoKnBychozZkzT9vZP/DQzMzMzM+vbt++oUaNmzpxpb29/4sSJ48ePe3l5dWilhKggGmuQxiwtLYcOHQogPj5e0bUQogQoRkkzTE1NATx9+lTRhRCiBChGu7SEhISTJ082aqytrU1KSgLwzjvvKKIoQpQMxWgXcvPmzV69evXq1YudN1pcXDxlyhQvL68NGzZUVVWxfUpLS+fNm5ednc3n8+fOnavIcglREhSjXUhNTU1OTk5OTg47b9TMzCwiIoLP5wcHB5uYmDg4ONjb25ubm8fExPD5/IiICEdHR0WXTIgSoDv1ys3W1lZXV1dPT6/ZvTo6OnZ2dt27d2c3e/bsuW3bNgD1LfPmzXvvvfe2bNly9uzZe/fuATA0NBwzZkxAQMCQIUM65RsQovRo3ij5k0QikUqltFozIa+KYpQQQjiha6OEEMIJxSghhHBCMUoIIZxQjBJCCCcUo4QQwgnFKCGEcEIxSgghnFCMEkIIJ/8P74ZWgJKpAVMAAACwelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAA4gZgPiBkZGDQ0gzcgowcimoQBksUiwwIWY4CxmTrAkowQrXIiNAyzEJMEOF+LgZmBkYGRiYGRmYGRhYGVjYGNnEAFZJ54FUoBk+YH906cpqII4Z8/4qADxEhBbQkIWKHbAHsTeZ9u6FKQOouWAPUxNofJiB6C77SDsYiCbAapGAS4uBgAnfB2tRmsD5QAAAPF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9kkGOwjAMRfc5xb8AkZM0TbKkLcOMEK0EhTuwn/trHJDHtB2N04XtvnzrWzGocRlOj2/8hh+MAeifr5SCeyAic0ZN0B2OXyP6ed9Jp59u43xFRss3+CzJ/TydpePwiZ2zkWqALK0SJ6BHr93dFmwEDKyo1zfKXrimTlbFLRmEjPhQoT9GRwFbloyWSqbExq2PS64VLmFCsKkUCrkKricnATMLev29FszCHcZhsdXXnrtpHHTP9XjdJhfqMDzLRldTy6j+HVtL6tKxg8Xs90m1lrfAufkBkHpy18/7PwAAAACwelRYdFNNSUxFUyByZGtpdCAyMDIzLjA5LjUAAHicZcq7DoMwDAXQX+kIUrCcFwlmqipVLH3skQcqdQOBULvx8U3abl7s6+ObBtKcTuS4SgMZrsuyXKczeYacW043CpxT5MNeNRq8QtXjb5Qrr6/2/5cH7CJGhWAyWghdh041WCqmVIrXanwt831bVkKYl+mYr8u4Xt/z47mBJi3RkZFoyEq05CR68hJbaiUGChLj/gHLeln8f0JKEgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<rdkit.Chem.rdchem.Mol at 0x7f64a9d22730>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rxn_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]>[H:6][O:7][H:8]>[H:1][C:4]([H:2])([H:3]).[F:5]\"\n",
    "from_rxn_smi = ReactionDatapoint.from_smi(rxn_smi, y, keep_h=True)\n",
    "from_rxn_smi.rct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAP+ElEQVR4nO3de1BTd5/H8W9iIAECCBhEQXkYUASt4spqq6BsRwVWrBcKbrHSOiJycaaX6VQ79QK2TqWDZbTj2GJFeaxaa8Wnio7VUhURi+I6tihIYUUCChIkoJCQ6/5xVpcSrv6AXPi8/kp+OTnzZcy85+ScA/L0ej0BAMDL4ht7AAAA84aMAgAwQUYBAJggowAATJBRAAAmyCgAABOBsQcA6KywsLC+vt7Pz2/SpEmGryoUinPnzhFReHi4SCTqbid6vb6goKCsrOzJkyeOjo7BwcGTJ08exKFhONMDmJjQ0FAi2rJlS5evVldXcx9dqVTa3R7y8vJ8fX07fdQXL14sk8kGbWoYvnA0CpYmNzd3+fLlarU6ODh4yZIlDg4Ot27d2rdv3+nTp5cvX37p0iUej2fsGcGiIKNgUXQ63UcffaRWq1NSUrZu3fpiPSQkZMWKFfn5+efPn+eOdgEGCi4xgUXh8/mXL1/es2fPli1bOq5HR0f7+PgQ0W+//Wak0cBiIaNgaUaPHp2UlGT4zZ3LaENDgzGGAkuGjIJ5a29vX7du3bp168rKynreUiaTEZGLi8uQzAXDCM6NgokqKSk5evSo4fqTJ086PlWpVJmZmUQUFRXV5Q1SHJlMVlJSQkRz5swZ6ElhuENGwUTl5OTk5OT0uplAIOAuGY0aNaqHzb7++mulUunu7h4eHj5gIwIQETIKJmvu3Lnz5s0zXG9padm1a9eLpzY2Ntzd+D0oKir64osviCgtLU0oFA7snADIKJiokJCQ1NRUw3WpVNoxo72qqKiIjIxUq9Xx8fErV64cuAEB/g8uMYElKy8vDwkJqa2tjYmJ2bt3r7HHAcuEjILFysvLe+2112pra6OiorKzs/l8fNphUOCDBZZp165dYWFhTU1NKSkpx44dEwhw/goGCz5bYN6USmV8fDwRbdiwgfsbTi0tLfHx8ceOHbO3t8/Ozl62bJmxZwQLh4yCeVOr1YcOHSKi2NjYyZMny+Xy6dOnV1VVEZFYLN6+ffv27ds7bu/k5HThwgWjjAqWChkF82ZlZbV06VIicnV1JSK5XM41lIgePXr06NGjTttLJJKhHRAsH0+P/6ceTExLS4tKpbK1tbW1tTV8VafTcb/I5OzsbHjVSKfTNTc397BzHo83cuTIAZwWABkFM6DVank8Hi61g2nC5xJM3alTpwQCAffNHcAEIaMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDABBkFAGAiMPYA0A8///xzSUlJYGBgaGio4asajSYtLY2I1q5d6+rq2sN+FArF4cOHZTJZWFhYQEDAYI3bNxqNprCw8MqVK/X19Tqdzs3Nbf78+bNmzeLxeH3fyf79+2UyGRF5eHisXLly0IbtWpVSmdvY2N2rQY6OAWKx4fqR+vpGjcZwfYqd3X+MHDmQ88EgQ0bNyY8//njkyJHk5OQuM6pSqTZt2kREERER3WW0oaEhKytr9+7dDx8+JKKmpibjZvTs2bPvv//+X3/91XFx8+bNs2fPPnDgwMSJE4lo5syZOTk5Y8aM6W4nZ86ciYuL4x4HBQUNfUal7e0H6+q6e3WkQNBlRg/W1z9Rqw3X48eORUbNCzI6XKhUquTk5O+//16pVPL5fLFY/OzZM+OOlJ2dvWbNGq1W6+7uvnr16oCAAIFA8Oeff2ZlZRUWFoaEhNy+fVsikbi5uS1btqy7nTQ3N69bt46IwsLCzp07N4Tjd2Y/YsTG8eMN1yfZ2houavV6uVpNRCn/+IeIz+91ezBlyOhwYW1tfefOnaCgoIiIiKioqI0bNx46dMiI85SXlycmJmq12oULF544cUL8/HhtyZIlH3744cqVKxcsWCCRSHrdz4YNG2pra5ctW7ZgwQLjZtSazw91du7jxo1qtY7Imsdb5OLSj5MXYJKQ0WGksLDQ2CP8v/T0dIVC4ebmduzYMfHfv/Pa2tqePHmyLzvJz8/PzMy0t7ffvXv36dOnB2fSQSFTq4lolJUVGmoBcKUejECn0x0/fpyI1q5dO/JlzwO2t7cnJCTo9frPPvvMw8NjQAccdA1cRq2tjT0IDABk1GJptdrg4ODg4GCTOgjllJeXy+VyIlq4cGGvG8fFxYWHh584caLTempqamlp6bRp05KTkwdlyoFT0toaU1oaU1qq0um4lUYuowLBE42moLn5bGPjtZaWtuevgnnBl3rzU1RUtHnzZsN1zd/vntHr9QUFBUTU2P29OJ3U1NRcu3aNfcIeuLq6zps3r6amhnvKXYvv2eXLlysqKl5//fWOi3/88Ud6ejqfz//2228Fgq4/xtXV1UVFRf0az2XOHJ2NTd+3dxAIZtrbc4+farUfV1Z22uDfHRyiJJJWrba8rY2ItM/XuS/1v7e0hN2+/aKdtnz+225ua9zcRvTnZi8wOmTU/BQXFxcXF/e6GZ/P/+CDD4jI29u7j3u+du1adHQ003C9CQkJuXjxYnNzM/fU/nmDevDWW2/V19d3vDFLq9WuWbNGrVa/9957s2bN6u6N+fn5q1at6td4kTduVPUnYf62tv/08+Meq3S63+TyThuIBQIichcK48aMISKr5zv3t7PzEoncrK3/zd5eYmXVpNEUNjffePo08+HDJrV6Q1dX/MFkIaPmJzQ0dPXq1YbrKpUqNjb2xVM+n//VV1/1a8/jxo178803Wefr0ZQpU4hIKBRyT1UqlU1vR3/btm3rtJKRkVFcXDxmzJjU1NQe3ujp6dnfH2eaUOhtZ9f37cc9/0GIyFEgyPDx6bSBi0BARB5CYcLYsR3XgxwdgxwdO66sGj36x8ePv5RKjzc0LJdIJvTnoBiMCxk1Pz4+PitWrDBcb2tr65jRl/Dqq69yV34Gm/PzG4Pq6uoc/16TXlVVVaWkpBDRnj17en4vd2r4ZWfsNwGPN7U/CTYU5eqaXV9fr1JdlMuRUTOCjIIRTJkyhc/n63S6mzdv+vr69uu9iYmJra2tIpHo6NGjR48efbFeWVlJRGVlZdHR0VOnTuV+ocu88Ii8RKJ6lapepTL2LNAPyCgYgYODw4wZM27cuHHkyJGYmJh+vffOnTtEpFQquzxwlslkx48flxucozQXTzQaInLq5qIZmCb8a4FxJCQk3Lhx48yZM7m5uREREX1/45kzZ1RdHaz99NNPO3bsCAgI+O677xwcHAZu0kFRrlCI+PzxHU6tElFNe/v/KBRExHhyAIYYMmqxtFotdxU7IyNjKE8R9tE777yTlZV19erV6Ojo9PT0uLg46+f3oj9+/PjgwYMTJ05cunQpEb399ts1NTWJiYncGeFXXnmlyx1ev36diMRi8YwZM4bqh+irP549S33wgIiO+PkJ+fxqpTK5vFxLlDh27H86O9uNGEFE//306ecPHmj0el9b2yD8aRKzgoxaLL1ef/PmTSJ68Q33+vXrYWFh3OPW1lYi2r179759+4ho9uzZubm5QzneiBEjTp48GRERcf369eTk5I0bNwYEBNjY2Ny/f//+/fsajWbu3LlcRouKiioqKhYtWjSU4w0shU73QKkkIu4WUVdr62li8SW5PK26eqdUOtra+qlG06LVEpGnSJTh7Y3fijEvyKg5cXR0dHV17e4bK4/H4/4+npWVFRHx+fxPP/2UurnF3c7Ozs7Y3xwlEklBQcH+/fsPHDhw8+bNK1eucOvjxo0LDw9PSkrinr777rsNDQ2BgYE9700oFDo5OfXlRtQBZ8PnuwuFzt2f0BwnEiW7u9Pz+0ZFfH66t3dhc/O/GhtvPX1a295uzeNNsrVd6Oz8XxKJNR8VNTM8vV5v7BkASKFQyGQytVo9atQo0z+zObDUer0Vfm3JnCGjAABM8PUBAIAJMgoAwAQZBVNXUVGxY8cOwz+UB2AikFEwdXfv3v3kk0+ys7ONPQhA15BRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmyCgAABNkFACACTIKAMAEGQUAYIKMAgAwQUYBAJggowAATJBRAAAmAmMPANCLCRMmfPzxx/7+/sYeBKBrPL1eb+wZAADMGI5GweRkZmaWlZWFhoaGhoYaviqXy7dt20ZEW7dudXR07G4njx8/zsrKunv3bl1dnZub28yZM2NiYpydnQdxbhi29AAmhqvnli1buny1urqa++hKpdLu9vDNN9/Y2Nh0+qi7uLhcuHBh0KaG4QuXmMDSZGVlJSQkCIXCnTt33rp1q6ys7OTJk1OnTm1sbIyMjKyrqzP2gGBp8KUeLE1eXp5QKLx48WJAQAC34uvrO2fOHC8vr5aWlpycnKSkJONOCBYGR6NgaQ4fPnzv3r0XDeVIJBJvb28iwtEoDDhkFCyQp6dnp5X29vbKykoi8vHxMcZEYMmQUTBvbW1t/v7+/v7+v//+e3fbtLe3r1+/vrW1dfz48VFRUUM5HgwHODcKJurs2bMNDQ2G68+ePev4VKvVlpaWElFra2unLfPy8s6fP19bW3vp0qXa2trAwMAffvjB8Ao+ACNkFExUcXFxcXFxr5sJhcLPP/+ciLhTnx1dvXr1yy+/5B6LRKLIyMjRo0cP+JwA+C0mMDlhYWG//PLLqlWrYmNjDV9taGiIiYkhIqlU6uHh0cN+mpqaZDJZXV1dSUnJrl277t27FxgY+Ouvv/Zw0z7AS8DRKJgoLy+v+fPnG65LpdI+7sHJycnJyWnChAnBwcGxsbHTp08vLi7OyMhISUkZyEFh2MMlJhgW7OzsFi1aREQXLlww9ixgaZBRGC5cXV2JqMvLVgAskFGwNPfu3duzZ4/hen5+PnV1JQqAETIK5q21tdXLy8vLy6uwsJCINBrN4sWL169fn5SUJJPJuG2USuWmTZvOnTtHRKtXrzbmuGCJcIkJzJtOp6uqqiIihUJBRAKBIDMzMzIycu/evZmZmT4+PtbW1pWVlW1tbUSUnJwcHR1t3IHB8iCjYHI8PT39/Py4U5mGrKys/Pz8uAdEJBKJdu7cSUQTJ07kNggJCSktLU1LSzt16lRFRYVWqxWLxWFhYYmJiW+88cZQ/RAwjOC+UbBkOp2ura1NLBYbexCwZMgoAAATXGICAGCCjAIAMEFGAQCYIKMAAEyQUQAAJsgoAAATZBQAgAkyCgDA5H8BhAgcb+74aH4AAACLelRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJx7v2/tPQYg4GdAAFYgZgHiBkZGDQ0gzcgowcimoQASlWCBCzHBWcycYElGCVZuBkYGRiYGRmYGRhYGEZBZ4m4gGSSTD+yfPk1BFcQ5e8ZHBYiXgNgSErJAsQP2IPY+29alIHUQLQfsYWrEALyjFrvi+CrHAAAAvnpUWHRNT0wgcmRraXQgMjAyMy4wOS41AAB4nH2QSw7CMAxE9znFXKCVm8+iy7YpH6GmEhTuwJ77qzHImFCEk4VtPY8zMeA4x9P9gXfYaAxAf27btrg5IjITOEE/7o8Jw9L10hnma1ouCPB5Ip+S7JZ5kk6DA6qmDsQBqukraQS0GLRbbUEvoMuKOr5RtsJ53qyKW9IJGbBToR+rg4BjioW5l91+TlHt8rFqKhe6yD1Lry/kslD/1OJaPj3nZgXXjFcxHOdHzwAAAIN6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJyL9rAyjI12tjKJ1Yj2sDKK1QRRxrGa0W5WprEKNRq6hnqmOgY61gYQAsQDUmBRa4iUpk5iSX5uQFF+gZWBXm5+jiOQ55tY4Feam5RapGdoZYgpaGJlhCloZGWMKWhsZYIpaFoDADOaN9kqtHGTAAAAAElFTkSuQmCC",
      "text/plain": [
       "<rdkit.Chem.rdchem.Mol at 0x7f64a9d228f0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rct_smi = \"[H:1][C:4]([H:2])([H:3])[F:5]\"\n",
    "pdt_smi = \"[H:1][C:4]([H:2])([H:3]).[F:5]\"\n",
    "from_tuple = ReactionDatapoint.from_smi((rct_smi, pdt_smi), y, keep_h=True)\n",
    "from_tuple.rct"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lazy Molecule Datapoints"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`LazyMoleculeDatapoint`s are similar to `MoleculeDatapoint`s, but the `rdkit.Chem.Mol` object is not computed until the `mol` property is accessed. This allows for more efficient memory usage when also using `cuik-molmaker` for featurization. The datapoint stores the featurization options used to create the `rdkit.Chem.Mol` object."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "datapoint = LazyMoleculeDatapoint(\n",
    "    smi, _keep_h=False, _add_h=False, _ignore_stereo=False, _reorder_atoms=False, y=y\n",
    "    ) # `Chem.Mol` not yet computed\n",
    "mol = datapoint.mol  # `Chem.Mol` is computed and cached"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "chemprop",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
